In [32]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy
import scipy.cluster.hierarchy as hcluster
import numpy as np
In [26]:
# use random.randn to
# Create an array of the given shape and populate it with random samples from a uniform distribution over [0, 1).
# N points per cluster
N=100
#cluster 1
c1 = numpy.random.randn(N, 2)
# cluster 2
c2 = numpy.random.randn(N, 2)
c2 = c2 + 5
# cluster 3
c3 = numpy.random.randn(N, 2)
c3 = c3 + 10
In [42]:
#Visualize the data before the clustering
data = np.concatenate((c1, c2, c3))
plt.scatter(*numpy.transpose(data), color='black')
plt.axis("equal")
plt.show()
In [47]:
# Now Perform the clustering
# inportant input:
# The threshold to apply when forming flat clusters.
thresh = 1.5
# Criterion: specifies the criterion for forming flat clusters.
# Valid values are
# -‘inconsistent’ (default),
# -‘distance’, or
# -‘maxclust’
# cluster formation algorithms.
criterion = 'distance'
# get the cluster
clusters = hcluster.fclusterdata(data, thresh, criterion=criterion)
# clusters = [2 2 2... 3 3...2..1....1 1 1 ]
# plotting
plt.scatter(*numpy.transpose(data), c=clusters)
plt.axis("equal")
title = "threshold: %f, number of clusters: %d" % (thresh, len(set(clusters)))
plt.title(title)
plt.show()
In [ ]: